
set more off
clear all
tempfile mbketi 
tempfile panel_dataset

set seed 2439
set maxvar 32767
set matsize 11000

use Data\ReSTAT_dataset.dta, clear

xtset sheno_num month 


**********Restrict to analysis sample 
keep if panel_sample == 1

***restrict as needed: 
*keep if female == 1
*keep if distance_B == 1
*keep if hyperbolic_a == 1

save `panel_dataset', replace
**********KS Statistic 

local saving "saved_partner q11_6_1a_r_hs saved_f q11_6_1_r_hs saved_t q11_6_5_r_hs"

local j = 0
matrix U_actual = J(6, 2, 999) //1st column: KS probability and 2nd: KS stat (only reflects endline_savings for outcomes with the partner bank
foreach var of varlist `saving'{
	
	local j = `j'+1
	
	preserve
	
	*calculates the difference in mean outcome in the baseline period and endline period 
	bysort sheno: egen x = mean(`var') if post == 1
	bysort sheno: egen endline_saving = max(x)
	drop x
		
	bysort sheno: egen x = mean(`var') if post == 0
	bysort sheno: egen baseline_saving = max(x)
	drop x

	gen endline_change = endline_saving - baseline_saving
	*because Partner Savings is not collected in baseline:
	replace endline_change = endline_saving if `j' == 1 | `j' == 2
	summ endline_change
	*for outcomes asked only in the endline period (i.e., with partner bank), we may not observe subject even if they are in the panel sample if subject is does not respond in surveys when question is asked 
	keep if monthlysurvey == 1 & post == 1
	bysort sheno: keep if _n == 1
	drop if endline_change == . 

	*KS test: 	
	ksmirnov endline_change, by(tr2) exact
	matrix U_actual[`j',1] = r(p_exact)
	matrix U_actual[`j',2] = r(D)
	
	
	restore
}




**********CDF Graphs 
use `panel_dataset', clear

local j = 1
foreach var of varlist `saving'{
preserve
*calculates the difference in mean outcome in the baseline period and endline period 
bysort sheno: egen x = mean(`var') if post == 1
bysort sheno: egen e_mean_`var' = max(x) 
drop x

bysort sheno: egen x = mean(`var') if post == 0
bysort sheno: egen b_mean_`var' = max(x) 
drop x

gen change_`var' = e_mean_`var' - b_mean_`var'
*because Partner Savings is not collected in baseline:
replace change_`var' = e_mean_`var' if `j' == 1 | `j' == 2

*by individual, main sample: 
keep if monthlysurvey == 1 & post == 1
bysort sheno: keep if _n == 1 
drop if change_`var' == . 

distplot change_`var', over(tr2) ytitle("F(X)")xtitle("Change in Mean Monthly Savings, IHS") legend(order(1 "Control" 2 "Treatment")) lpattern(solid dash) lcolor(black gray)
graph save "output\cdf_`var'.gph", replace

restore
local j = `j' + 1
}

**********SP statistic

use `panel_dataset', clear
keep if monthlysurvey == 1 
save `panel_dataset', replace

local rounds = 1000
matrix Prob = J(6, 1, 999)
local turn = 0
foreach var of varlist `saving'{
	use `panel_dataset', clear
	local prob_change = 0
	local turn = `turn' + 1
	
	*calculate difference
	bysort sheno: egen x = mean(`var') if post == 1
	bysort sheno: egen e_mean_`var' = max(x)
	drop x
	bysort sheno: egen x = mean(`var') if post == 0
	bysort sheno: egen b_mean_`var' = max(x)
	drop x
	gen change_`var' = e_mean_`var' - b_mean_`var'
	*because Partner Savings is not collected in baseline:
	replace change_`var' = e_mean_`var' if `turn' == 1 | `turn' == 2
	
	keep if post == 1
	bysort sheno: keep if _n == 1
	drop if change_`var' == . 
	
	*selects treatment and control observation for x rounds, and counts how many times treatment > control
	foreach i of numlist 1/`rounds'{
		preserve
		sample 1, count by(tr2)
		sort tr2
		if change_`var'[1] < change_`var'[2]{
			local prob_change = `prob_change' + 1
		}
		restore
		qui local i = `i'+1
	}
	*ratio of times treatment > control 
	matrix Prob[`turn', 1] = `prob_change'/`rounds'
}


***bootsrapped probabilities (where treatment and control are randomly assigned): 
local loops = 1000 /*number of samples where we randomize treatment assignment */
local sample = 100 /*number of comparisons per randomly assigned treatment adn control */
matrix Prob_change_bootstrap = J(`loops', 6, 999)
local count = 0
foreach var of varlist `saving'{
	use `panel_dataset', clear
	local count = `count' + 1
	qui bysort sheno: egen x = mean(`var') if post == 1
	qui bysort sheno: egen e_mean_`var' = max(x)
	qui drop x
	qui bysort sheno: egen x = mean(`var') if post == 0
	qui bysort sheno: egen b_mean_`var' = max(x)
	qui drop x
	gen change_`var' = e_mean_`var' - b_mean_`var'
	replace change_`var' = e_mean_`var' if `count' == 1 | `count' == 2
	qui keep if post == 1
	qui bysort sheno: keep if _n == 1
	qui drop if change_`var' == . 
	*number of samples from which to draw
	foreach t of numlist 1/`loops'{
		drop tr2
		qui gen tr2 = runiform()
		qui replace tr2 = (tr2 > .5)
		local prob_change = 0
		foreach i of numlist 1/`sample'{
			preserve
			sample 1, count by(tr2)
			sort tr2
			if change_`var'[1] < change_`var'[2]{
				local prob_change = `prob_change' + 1
			}
			restore
			qui local i = `i'+1
			}
		matrix Prob_change_bootstrap[`t',`count'] = `prob_change'/`sample'
		qui local t = `t' + 1
	}
}



*K-stat: significant level (1st) and statistic (2)
svmat U_actual
*% of times changes is higher (in treatment, using true treatment status)
svmat Prob
*bootstrapped percentages when random tr2 is higher for treatment (i.e., randomly assigned treatment) - sort each variable to identify the appropriate p-value
svmat Prob_change_bootstrap
*to estimate p-value
*gsort -Prob_change_bootstrap#

save output\graph_saving.dta, replace 

